Data Descriptives: DV

summary(df$lfp_female)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    12.7    56.8    62.1    61.9    67.6   100.0
hist(df$lfp_female, breaks=20, main="Female LFP in Canada", xlab='Female LFP %')
abline(v=mean(df$lfp_female), col='red')

random_regs <- sample(unique(df[df$population > 16000, ]$region_name), size=6)
for (region in random_regs){
  region_data <- df[df$region_name == region,]
  plot(region_data['lfp_female'], main=region)
}

Data Descriptives: IV

##                                    [,1]
## pca1_stock                    2.2266596
## avg_hh_size                   2.5163871
## med_hh_income_1000           78.2328457
## avg_rooms_per_dwelling        6.1683318
## percent_hh_with_children      0.4089649
## lfp_male                     70.1691613
## percent_drivers_female        0.4434255
## percent_publictransit_female  0.5712643

Study 1: Gender differences in spatial behaviour

Commuting Modes

\[H_0: \text{driver}_{F} \geq \text{driver}_M \\ H_1: \text{driver}_{F} \lt \text{driver}_M\]

t.test(commute_modes_female$driver, commute_modes_male$driver, alternative='less', var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_modes_female$driver and commute_modes_male$driver
## t = -81.425, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf -150.698
## sample estimates:
## mean of the differences 
##               -153.8055

Reject null hypothesis; less women commute as drivers than men.

\[H_0: \text{transit}_{F} \leq \text{transit}_M \\ H_1: \text{transit}_{F} \gt \text{transit}_M\]

t.test(commute_modes_female$transit, commute_modes_male$transit, alternative='greater', var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_modes_female$transit and commute_modes_male$transit
## t = 48.281, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  49.17677      Inf
## sample estimates:
## mean of the differences 
##                50.91152

Reject null hypothesis; more women commute by public transit than men.

Commuting Durations

T-tests: all stat sig that male and female are diff

t.test(commute_time_female$t15, commute_time_male$t15, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t15 and commute_time_male$t15
## t = 40.692, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  37.10181 40.85763
## sample estimates:
## mean of the differences 
##                38.97972
t.test(commute_time_female$t15to29, commute_time_male$t15to29, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t15to29 and commute_time_male$t15to29
## t = -20.524, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.36934 -19.29425
## sample estimates:
## mean of the differences 
##                -21.3318
t.test(commute_time_female$t30to44, commute_time_male$t30to44, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t30to44 and commute_time_male$t30to44
## t = -56.133, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -51.19870 -47.74324
## sample estimates:
## mean of the differences 
##               -49.47097
t.test(commute_time_female$t45to59, commute_time_male$t45to59, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t45to59 and commute_time_male$t45to59
## t = -30.318, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -19.02849 -16.71713
## sample estimates:
## mean of the differences 
##               -17.87281
t.test(commute_time_female$t60, commute_time_male$t60, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t60 and commute_time_male$t60
## t = -34.61, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.42807 -25.38022
## sample estimates:
## mean of the differences 
##               -26.90415

Study 2: determinants of LFP

df_vars <- df_no_geom[iv_colnames]
df_vars$lfp_female <- df_no_geom$lfp_female
model_all <- lm(lfp_female ~ ., data=df_vars)
summary(model_all)
## 
## Call:
## lm(formula = lfp_female ~ ., data = df_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.894  -2.413   0.032   2.332  46.788 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -19.534571   0.928780 -21.032  < 2e-16 ***
## pca1_stock                    -0.311675   0.039182  -7.955 2.17e-15 ***
## avg_hh_size                   -1.242855   0.301665  -4.120 3.85e-05 ***
## med_hh_income_1000            -0.014824   0.003476  -4.265 2.03e-05 ***
## avg_rooms_per_dwelling         0.313202   0.091063   3.439 0.000587 ***
## percent_hh_with_children       1.601108   1.110579   1.442 0.149448    
## lfp_male                       0.989699   0.008558 115.652  < 2e-16 ***
## percent_drivers_female        29.050132   1.316910  22.059  < 2e-16 ***
## percent_publictransit_female   2.632025   0.392777   6.701 2.28e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.942 on 5416 degrees of freedom
## Multiple R-squared:  0.7725, Adjusted R-squared:  0.7722 
## F-statistic:  2299 on 8 and 5416 DF,  p-value: < 2.2e-16
plot(model_all$residuals, main='LFP ~ . : residuals', ylab='residual')

model_no_sndi <- lm(lfp_female ~ . -pca1_stock, data=df_vars)
anova(model_no_sndi, model_all)

LFP ~ SNDI^2

quadratic_model <- lm(lfp_female ~ poly(pca1_stock, 2), data=df_vars)
summary(quadratic_model)
## 
## Call:
## lm(formula = lfp_female ~ poly(pca1_stock, 2), data = df_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.879  -5.088   0.157   5.696  38.325 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           61.9024     0.1120 552.916  < 2e-16 ***
## poly(pca1_stock, 2)1   0.4185     8.2461   0.051     0.96    
## poly(pca1_stock, 2)2  36.8133     8.2461   4.464 8.19e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.246 on 5422 degrees of freedom
## Multiple R-squared:  0.003663,   Adjusted R-squared:  0.003295 
## F-statistic: 9.966 on 2 and 5422 DF,  p-value: 4.782e-05
ggplot(df_vars, aes(x=pca1_stock, y=lfp_female)) + geom_point() + stat_smooth(se=F, method='lm', formula=y~poly(x,2)) + labs(title='LFP ~ SNDI + SNDI^2', y='Female LFP (%)', x='SNDI')

Montreal

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.70   55.45   60.75   61.77   68.40   84.50
## 
## Call:
## lm(formula = lfp_female ~ ., data = mtl_data_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.9560  -2.6888  -0.1098   2.8328  14.6956 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -46.77406    4.52755 -10.331  < 2e-16 ***
## pca1_stock                    -0.95257    0.20592  -4.626 4.86e-06 ***
## avg_hh_size                   12.59185    2.34778   5.363 1.30e-07 ***
## med_hh_income_1000            -0.04236    0.01903  -2.226 0.026529 *  
## avg_rooms_per_dwelling         1.83731    0.55175   3.330 0.000939 ***
## percent_hh_with_children     -53.81883    6.58213  -8.177 2.91e-15 ***
## lfp_male                       1.01565    0.03206  31.679  < 2e-16 ***
## percent_drivers_female        21.55553    3.95562   5.449 8.28e-08 ***
## percent_publictransit_female  24.94393    3.44850   7.233 2.00e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.344 on 457 degrees of freedom
## Multiple R-squared:  0.7965, Adjusted R-squared:  0.7929 
## F-statistic: 223.6 on 8 and 457 DF,  p-value: < 2.2e-16

Toronto

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    38.4    54.3    59.7    60.3    65.7    93.0
## 
## Call:
## lm(formula = lfp_female ~ ., data = to_data_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3944  -2.3390   0.0719   2.4194  20.6767 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -22.67732    3.28606  -6.901 1.41e-11 ***
## pca1_stock                    -0.66500    0.14723  -4.517 7.68e-06 ***
## avg_hh_size                   -0.99671    0.92902  -1.073 0.283800    
## med_hh_income_1000            -0.03934    0.01032  -3.812 0.000153 ***
## avg_rooms_per_dwelling         1.30731    0.30125   4.340 1.70e-05 ***
## percent_hh_with_children     -10.91745    3.30729  -3.301 0.001025 ** 
## lfp_male                       0.97139    0.03066  31.681  < 2e-16 ***
## percent_drivers_female        17.83610    3.46265   5.151 3.60e-07 ***
## percent_publictransit_female  22.22891    2.98261   7.453 3.52e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.861 on 556 degrees of freedom
## Multiple R-squared:  0.7987, Adjusted R-squared:  0.7959 
## F-statistic: 275.8 on 8 and 556 DF,  p-value: < 2.2e-16